clear

cap log close
set more off

/* ******************************************************************************************
Program to read in 2003 and 2004 TWC data.
Keeps only one record from duplicates (duplicate if ssn, employer, wage are all the same)
****************************************************************************************** */

*global twcrcpts="/mnt/data/tsp1/srcdata/receipts/twc"
global twcrcpts="${d1}twc"

*Set the paths with the raw data
global twc03_1="twc_05-01/C4W20031"
global twc03_2="twc_05-01/C4W20032"  /* Note: the Note in this directory says the 2002 file has been superceded, but file in 05-03 folder really has data from 2003!!!!! */
*global twc03_3="twc_05-03/C4W20032"  /* Note: this has data from q3, although label says data from q2 */
* note: "twc_05-03/C4W20032" does not exist. "twc_05-03/C4W20033" was used instead. It produces the same results as the old do file.  
global twc03_3="twc_05-03/C4W20033"  
global twc03_4="twc_06-03/C4W20034"
global twc04_1="twc_06-03/C4W20041"
global twc04_2="twc_06-03/C4W20042"
global twc04_3="twc_06-03-a/C420043"
global twc04_4="twc_06-06/C4W20044"  /* Note: 2004 q4 data is screwed up */


# delimit ;
forvalues y=3/4 {;
forvalues q=1/4 {;
 disp "Infiling Year=200`y' Quarter=`q'";
 ls $twcrcpts/${twc0`y'_`q'}.UTD;
};
};

forvalues y=3/4 {;
 forvalues q=1/4 {;

 disp " ";
 disp "Infiling Year=200`y' Quarter=`q'";
 qui infix	
	str9 altpid	1-9
	str9 twcaccts	10-18
	int years	19-22
	byte quarters	23
	long wagess	24-30

	using $twcrcpts/${twc0`y'_`q'}.UTD, clear;

  qui count;
  disp "Number of records: " r(N);
  qui count if real(altpid)==.;
  disp "% Missing or non-numeric ssn: " r(N)/_N;
  qui count if (years!=2000+`y') | (quarters!=`q');
  disp "Number wrong quarter or year: " r(N);
  qui keep if (years==2000+`y') & (quarters==`q');
  drop quarters years;
  qui replace wages=wages/100  /* convert to dollars */;

  *keep only one record for each ssn-employer-wage group (assume others are duplicates);
  bysort altpid twcaccts wages: keep if _n==1;
  drop twcaccts ;

  summ wages;

  qui compress;
  *save ${fmartorell_home}/data/twc/C4w200`y'_`q', replace;
  save ${d1}data/twc/C4w200`y'_`q', replace;
 };
};


